Constructor Ranking, Driver
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(stringr)
library(ggplot2)
library(magrittr)
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
library(dplyr)
library(tidymodels)
## Registered S3 method overwritten by 'tune':
## method from
## required_pkgs.model_spec parsnip
## ── Attaching packages ────────────────────────────────────── tidymodels 0.1.4 ──
## ✓ broom 0.7.9 ✓ rsample 0.1.1
## ✓ dials 0.0.10 ✓ tibble 3.1.6
## ✓ infer 1.0.0 ✓ tidyr 1.2.0
## ✓ modeldata 0.1.1 ✓ tune 0.1.6
## ✓ parsnip 0.1.7 ✓ workflows 0.2.4
## ✓ purrr 0.3.4 ✓ workflowsets 0.1.0
## ✓ recipes 0.1.17 ✓ yardstick 0.0.8
## Warning: package 'tidyr' was built under R version 4.1.2
## ── Conflicts ───────────────────────────────────────── tidymodels_conflicts() ──
## x purrr::discard() masks scales::discard()
## x tidyr::extract() masks magrittr::extract()
## x plotly::filter() masks dplyr::filter(), stats::filter()
## x recipes::fixed() masks stringr::fixed()
## x dplyr::lag() masks stats::lag()
## x purrr::set_names() masks magrittr::set_names()
## x recipes::step() masks stats::step()
## • Use tidymodels_prefer() to resolve common conflicts.
read in constructor data
constructors <- read.csv("/Users/nikkigerjarusak/desktop/DataVizProj/group_f_f1/raw_data/constructors.csv")
standings <- read.csv("/Users/nikkigerjarusak/desktop/DataVizProj/group_f_f1/raw_data/constructor_standings.csv")
results <- read.csv("/Users/nikkigerjarusak/desktop/DataVizProj/group_f_f1/raw_data/results.csv")
races <- read.csv("/Users/nikkigerjarusak/desktop/DataVizProj/group_f_f1/raw_data/races.csv")
circuits <- read.csv("/Users/nikkigerjarusak/desktop/DataVizProj/group_f_f1/raw_data/circuits.csv")
status <- read.csv("/Users/nikkigerjarusak/desktop/DataVizProj/group_f_f1/raw_data/status2.csv")
## merge data frames
df <- left_join(constructors, standings, by = "constructorId")
df <- left_join(df, races, by = "raceId")
## first subset
df <- subset(df, select= c(constructorId, constructorRef, name.x, constructorStandingsId,
raceId, points, position, year, circuitId, name.y, wins))
## subset results
##results <- subset(results, select = c(constructorId, statusId))
## merge again with results to get standingId
## df <- left_join(df, results, by = "constructorId")
c_winnings <- df %>%
group_by(constructorId, name.x) %>%
summarize(totalwins = sum(wins)) %>%
arrange(desc(totalwins)) %>%
filter(totalwins != 0)
## `summarise()` has grouped output by 'constructorId'. You can override using the `.groups` argument.
win_plot <- ggplot(c_winnings, aes(x = name.x, y = totalwins, fill = name.x)) +
geom_bar(position = "dodge", stat = "identity") + theme(axis.text.x = element_text(angle = 45)) +
labs (x = "Constructors", y = "Number of Wins",
title = "All Time Wins by Constructor")
ggplotly(win_plot)
top_c_wins <- c_winnings %>%
ungroup %>%
slice(1:10)
top_win_plot <- ggplot(top_c_wins, aes(x = name.x, y = totalwins, fill = name.x)) +
geom_bar(position = "dodge", stat = "identity") + theme(axis.text.x = element_text(angle = 45)) +
labs (x = "Constructors", y = "Number of Wins",
title = "All Time Top 10 Winningest Constructors")
ggplotly(top_win_plot)
total_points <- df %>%
group_by(constructorId, name.x) %>%
summarize(totalpoints = sum(points)) %>%
arrange(desc(totalpoints)) %>%
filter(totalpoints > 700)
## `summarise()` has grouped output by 'constructorId'. You can override using the `.groups` argument.
average_points <- df %>%
group_by(constructorId, name.x) %>%
summarize(avg_points = mean(points)) %>%
arrange(desc(avg_points)) %>%
filter(avg_points > 10)
## `summarise()` has grouped output by 'constructorId'. You can override using the `.groups` argument.
total_plot <- ggplot(total_points, aes(x = name.x, y = totalpoints, fill = name.x)) +
geom_bar(position = "dodge", stat = "identity") + theme(axis.text.x = element_text(angle = 45)) +
labs (x = "Constructors", y = "Total Points",
title = "All Time Total Points Earned by Constructor")
ggplotly(total_plot)
## top 10 constructors by total points earned
top_points <- total_points %>%
ungroup %>%
slice(1:10)
top_points_plot <- ggplot(top_points, aes(x = name.x, y = totalpoints, fill = name.x)) +
geom_bar(position = "dodge", stat = "identity") + theme(axis.text.x = element_text(angle = 45)) +
labs (x = "Constructors", y = "Total Points",
title = "Top 10 Constructors of All Time by Points")
ggplotly(top_points_plot)
avg_points_plot <- ggplot(average_points, aes(x = name.x, y = avg_points, fill = name.x)) +
geom_bar(position = "dodge", stat = "identity") + theme(axis.text.x = element_text(angle = 45)) +
labs (x = "Constructors", y = "Average Points",
title = "All Time Average Points Earned by Constructor")
ggplotly(avg_points_plot)
## top 10 constructors by average points
top_avg_points <- average_points %>%
ungroup %>%
slice(1:10)
top_avg_plot <- ggplot(top_avg_points, aes(x = name.x, y = avg_points, fill = name.x)) +
geom_bar(position = "dodge", stat = "identity") + theme(axis.text.x = element_text(angle = 45)) +
labs (x = "Constructors", y = "Average Points",
title = "Top 10 Constructors of All Time by Average Points Earned")
ggplotly(top_avg_plot)
## 2019 season
season_19 <- df %>%
filter (year == 2019)
## 2021 season
season_21 <- df %>%
filter(year == 2021)
winnings_19 <- season_19 %>%
group_by(constructorId, name.x) %>%
summarize(totalwins = sum(wins)) %>%
arrange(desc(totalwins)) %>%
filter(totalwins != 0)
## `summarise()` has grouped output by 'constructorId'. You can override using the `.groups` argument.
win_19_plot <- ggplot(winnings_19, aes(x = name.x, y = totalwins, fill = name.x)) +
geom_bar(position = "dodge", stat = "identity") + theme(axis.text.x = element_text(angle = 45)) +
scale_fill_manual(values = c("Ferrari" = "#ED1C24",
"Mercedes" = "#6CD3BF",
"Red Bull" = "#1E5BC6")) +
labs (x = "Constructors", y = "Number of Wins", fill = "Constructor Names",
title = "2019 Wins by Constructor")
ggplotly(win_19_plot)
winnings_21 <- season_21 %>%
group_by(constructorId, name.x) %>%
summarize(totalwins = sum(wins)) %>%
arrange(desc(totalwins)) %>%
filter(totalwins != 0)
## `summarise()` has grouped output by 'constructorId'. You can override using the `.groups` argument.
win_21_plot <- ggplot(winnings_21, aes(x = name.x, y = totalwins, fill = name.x)) +
geom_bar(position = "dodge", stat = "identity") + theme(axis.text.x = element_text(angle = 45)) +
scale_fill_manual(values = c("Apline F1 Team" = "#2293D1",
"McLaren" = "#F58020",
"Mercedes" = "#6CD3BF",
"Red Bull" = "#1E5BC6")) +
labs (x = "Constructors", y = "Number of Wins", fill = "Constructor Names",
title = "2021 Wins by Constructor")
ggplotly(win_21_plot)
points_19 <- season_19 %>%
group_by(constructorId, name.x) %>%
summarize(totalpoints = sum(points)) %>%
arrange(desc(totalpoints))
## `summarise()` has grouped output by 'constructorId'. You can override using the `.groups` argument.
avg_points_19 <- season_19 %>%
group_by(constructorId, name.x) %>%
summarize(avg_points = mean(points)) %>%
arrange(desc(avg_points))
## `summarise()` has grouped output by 'constructorId'. You can override using the `.groups` argument.
points_19_plot <- ggplot(points_19, aes(x = name.x, y = totalpoints, fill = name.x)) +
geom_bar(position = "dodge", stat = "identity") + theme(axis.text.x = element_text(angle = 45)) +
scale_fill_manual(values = c("Alfa Romeo" = "#B12039",
"Ferrari" = "#ED1C24",
"Haas F1 Team" = "#B6BABD",
"McLaren" = "#F58020",
"Mercedes" = "#6CD3BF",
"Racing Point" = "#F596C8",
"Red Bull" = "#1E5BC6",
"Renault" = "#FFF500",
"Toro Rosso" = "#469BFF",
"Williams" = "#37BEDD")) +
labs (x = "Constructors", y = "Total Points", fill = "Constructor Names",
title = "Total Points Earned by Constructors in 2019")
ggplotly(points_19_plot)
avg_points_19_plot <- ggplot(avg_points_19, aes(x = name.x, y = avg_points, fill = name.x)) +
geom_bar(position = "dodge", stat = "identity") + theme(axis.text.x = element_text(angle = 45)) +
scale_fill_manual(values = c("Alfa Romeo" = "#B12039",
"Ferrari" = "#ED1C24",
"Haas F1 Team" = "#B6BABD",
"McLaren" = "#F58020",
"Mercedes" = "#6CD3BF",
"Racing Point" = "#F596C8",
"Red Bull" = "#1E5BC6",
"Renault" = "#FFF500",
"Toro Rosso" = "#469BFF",
"Williams" = "#37BEDD")) +
labs (x = "Constructors", y = "Average Points", fill = "Constructor Names",
title = "Average Points Earned by Constructors in 2019")
ggplotly(avg_points_19_plot)
points_21 <- season_21 %>%
group_by(constructorId, name.x) %>%
summarize(totalpoints = sum(points)) %>%
arrange(desc(totalpoints)) %>%
filter(totalpoints != 0)
## `summarise()` has grouped output by 'constructorId'. You can override using the `.groups` argument.
avg_points_21 <- season_21 %>%
group_by(constructorId, name.x) %>%
summarize(avg_points = mean(points)) %>%
arrange(desc(avg_points)) %>%
filter(avg_points != 0)
## `summarise()` has grouped output by 'constructorId'. You can override using the `.groups` argument.
points_21_plot <- ggplot(points_21, aes(x = name.x, y = totalpoints, fill = name.x)) +
geom_bar(position = "dodge", stat = "identity") + theme(axis.text.x = element_text(angle = 45)) +
scale_fill_manual(values = c("Alfa Romeo" = "#B12039",
"AlphaTauri" = "#4E7C9B",
"Alpine F1 Team" = "#2293D1",
"Aston Martin" = "#2D826D",
"Ferrari" = "#ED1C24",
"McLaren" = "#F58020",
"Mercedes" = "#6CD3BF",
"Red Bull" = "#1E5BC6",
"Williams" = "#37BEDD")) +
labs (x = "Constructors", y = "Total Points", fill = "Constructor Names",
title = "Total Points Earned by Constructors in 2021")
ggplotly(points_21_plot)
avg_points_21_plot <- ggplot(avg_points_21, aes(x = name.x, y = avg_points, fill = name.x)) +
geom_bar(position = "dodge", stat = "identity") + theme(axis.text.x = element_text(angle = 45)) +
scale_fill_manual(values = c("Alfa Romeo" = "#B12039",
"AlphaTauri" = "#4E7C9B",
"Alpine F1 Team" = "#2293D1",
"Aston Martin" = "#2D826D",
"Ferrari" = "#ED1C24",
"McLaren" = "#F58020",
"Mercedes" = "#6CD3BF",
"Red Bull" = "#1E5BC6",
"Williams" = "#37BEDD")) +
labs (x = "Constructors", y = "Average Points", fill = "Constructor Names",
title = "Average Points Earned by Constructors in 2021")
ggplotly(avg_points_21_plot)
df2 <- left_join(results, status, by = "statusId")
df2 <- left_join(df2, constructors, by = "constructorId")
df2 <- left_join(df2, races, by = "raceId")
## subset
df2 <- subset(df2, select = c(resultId, raceId, driverId, constructorId,
position, points, statusId, status, constructorRef, name.x,
year, circuitId, name.y))
status_19 <- df2 %>%
filter(year == 2019) %>%
group_by(name.x, status) %>%
summarize(status_count = n())
## `summarise()` has grouped output by 'name.x'. You can override using the `.groups` argument.
status_count_19 <- df2 %>%
filter(year == 2019) %>%
group_by(name.x, status) %>%
summarize(status_count = n()) %>%
pivot_wider(
names_from = status,
values_from = status_count
)
## `summarise()` has grouped output by 'name.x'. You can override using the `.groups` argument.
status_count_19
## # A tibble: 10 × 7
## # Groups: name.x [10]
## name.x Accident `Car Failure` Collision DNF Finished Disqualified
## <chr> <int> <int> <int> <int> <int> <int>
## 1 Alfa Romeo 1 1 1 27 12 NA
## 2 Ferrari 1 2 3 3 33 NA
## 3 Haas F1 Team NA 7 3 21 11 NA
## 4 McLaren NA 6 3 14 18 1
## 5 Mercedes 1 1 NA 1 39 NA
## 6 Racing Point NA 3 1 24 14 NA
## 7 Red Bull 1 2 1 5 33 NA
## 8 Renault 1 5 2 17 15 2
## 9 Toro Rosso NA 2 3 20 17 NA
## 10 Williams NA 3 1 34 4 NA
## plot constructor status for 2019
status_19_plot <- ggplot(status_19, aes(x = name.x, y = status_count, fill = status)) +
geom_col() +
coord_flip() +
scale_fill_manual(values = c("Accident" = "#000000",
"Car Failure" = "#47ABFF",
"Collision" = "#FF0000",
"Disqualified" = "#505050",
"DNF" = "#ADADAD",
"Finished" = "#1BBC00")) +
labs(x = "CConstructor", y = "Status Count",
title ="2019 Constructor Status")
ggplotly(status_19_plot)
ferrari_status_2019 <- df2 %>%
filter(name.x == "Ferrari", year == 2019) %>%
na.omit() %>%
group_by(name.x, status) %>%
summarize(status_count = n())
## `summarise()` has grouped output by 'name.x'. You can override using the `.groups` argument.
## compute position of labels
ferrari_status_2019 <- ferrari_status_2019 %>%
arrange(desc(status_count)) %>%
mutate(prop = status_count / sum(ferrari_status_2019$status_count) *100) %>%
mutate(ypos = cumsum(prop)- 0.5*prop )
## ferrari_2019_pie <- ggplot(ferrari_status_2019, aes(x="", y= prop, fill = status)) +
## geom_bar(stat="identity", width=1) +
## coord_polar("y", start=0) +
## theme_void() +
## geom_text(aes(y = ypos, label = status_count), color = "white", size = 3) +
## labs(title="Ferrari 2019 Statuses")
## ferrari_2019_pie
## red bull
rb_status_2019 <- df2 %>%
filter(name.x == "Red Bull", year == 2019) %>%
na.omit() %>%
group_by(name.x, status) %>%
summarize(status_count = n())
## `summarise()` has grouped output by 'name.x'. You can override using the `.groups` argument.
## compute position of labels
rb_status_2019 <- rb_status_2019 %>%
arrange(desc(status_count)) %>%
mutate(prop = status_count / sum(rb_status_2019$status_count) *100) %>%
mutate(ypos = cumsum(prop)- 0.5*prop )
## mercedes
merc_status_2019 <- df2 %>%
filter(name.x == "Mercedes", year == 2019) %>%
na.omit() %>%
group_by(name.x, status) %>%
summarize(status_count = n())
## `summarise()` has grouped output by 'name.x'. You can override using the `.groups` argument.
## compute position of labels
merc_status_2019 <- merc_status_2019 %>%
arrange(desc(status_count)) %>%
mutate(prop = status_count / sum(merc_status_2019$status_count) *100) %>%
mutate(ypos = cumsum(prop)- 0.5*prop )
## mcclaren
mcl_status_2019 <- df2 %>%
filter(name.x == "McLaren", year == 2019) %>%
na.omit() %>%
group_by(name.x, status) %>%
summarize(status_count = n())
## `summarise()` has grouped output by 'name.x'. You can override using the `.groups` argument.
## compute position of labels
mcl_status_2019 <- mcl_status_2019 %>%
arrange(desc(status_count)) %>%
mutate(prop = status_count / sum(mcl_status_2019$status_count) *100) %>%
mutate(ypos = cumsum(prop)- 0.5*prop )
pie19 <- plot_ly(labels = ~status, values = ~prop,
textposition = 'inside',textinfo = 'label+percent') %>%
add_pie(data = ferrari_status_2019, name = "Ferrari", marker = list(colors = c("#1BBC00", "#FF0000",
"#ADADAD", "#47ABFF",
"#000000", "#505050")),
domain = list(row = 0, column = 0)) %>%
add_pie(data = rb_status_2019, name = "Red Bull",marker = list(colors = c("#1BBC00", "#ADADAD",
"#47ABFF", "#000000",
"#FF0000")),
domain = list(row = 0, column = 1))%>%
add_pie(data = merc_status_2019, name = "Mercedes", marker = list(colors = c("#1BBC00", "#000000",
"#47ABFF",
"#ADADAD")),
domain = list(row = 1, column = 0))%>%
add_pie(data = mcl_status_2019, name = "McLaren", marker = list(colors = c("#1BBC00", "#ADADAD",
"#47ABFF", "#FF0000",
"#505050")),
domain = list(row = 1, column = 1))%>%
layout(title = '2019 Constructor Statuses', showlegend = T, grid=list(rows=2, columns=2),
xaxis = list(showgrid = FALSE, zeroline = FALSE, showticklabels = FALSE),
yaxis = list(showgrid = FALSE, zeroline = FALSE, showticklabels = FALSE),
annotations = list(x = c(.004, .55, .0002, .55),
y = c(.78, .78, .22, .22),
text = c("Ferrari","Red Bull","Mercedes","McLaren"),
xref = "papper",
yref = "papper",
showarrow = F
)
)
pie19
status_2021 <- df2 %>%
filter(year == 2021) %>%
group_by(name.x, status) %>%
summarize(status_count = n())
## `summarise()` has grouped output by 'name.x'. You can override using the `.groups` argument.
status_count_2021 <- df2 %>%
filter(year == 2021) %>%
group_by(name.x, status) %>%
summarize(status_count = n()) %>%
pivot_wider(
names_from = status,
values_from = status_count
)
## `summarise()` has grouped output by 'name.x'. You can override using the `.groups` argument.
status_count_2021
## # A tibble: 10 × 7
## # Groups: name.x [10]
## name.x `Car Failure` Collision DNF Finished Accident Disqualified
## <chr> <int> <int> <int> <int> <int> <int>
## 1 Alfa Romeo 2 1 31 10 NA NA
## 2 AlphaTauri 6 2 16 20 NA NA
## 3 Alpine F1 Team 4 1 19 20 NA NA
## 4 Aston Martin 2 4 20 16 1 1
## 5 Ferrari 1 1 7 35 NA NA
## 6 Haas F1 Team 3 3 30 5 2 NA
## 7 McLaren 1 1 14 28 NA NA
## 8 Mercedes 2 3 1 38 NA NA
## 9 Red Bull 1 4 2 36 1 NA
## 10 Williams 6 3 24 9 2 NA
## plot constructor status for 2021
status_2021_plot <- ggplot(status_2021, aes(x = name.x, y = status_count, fill = status)) +
geom_col() +
coord_flip() +
scale_fill_manual(values = c("Accident" = "#000000",
"Car Failure" = "#47ABFF",
"Collision" = "#FF0000",
"Disqualified" = "#505050",
"DNF" = "#ADADAD",
"Finished" = "#1BBC00")) +
labs(x = "CConstructor", y = "Status Count",
title="2021 Constructor Status")
ggplotly(status_2021_plot)
## ferrari
ferrari_status_2021 <- df2 %>%
filter(name.x == "Ferrari", year == 2021) %>%
na.omit() %>%
group_by(name.x, status) %>%
summarize(status_count = n())
## `summarise()` has grouped output by 'name.x'. You can override using the `.groups` argument.
## compute position of labels
ferrari_status_2021 <- ferrari_status_2021 %>%
arrange(desc(status_count)) %>%
mutate(prop = status_count / sum(ferrari_status_2021$status_count) *100) %>%
mutate(ypos = cumsum(prop)- 0.5*prop )
## red bull
rb_status_2021 <- df2 %>%
filter(name.x == "Red Bull", year == 2021) %>%
na.omit() %>%
group_by(name.x, status) %>%
summarize(status_count = n())
## `summarise()` has grouped output by 'name.x'. You can override using the `.groups` argument.
## compute position of labels
rb_status_2021 <- rb_status_2021 %>%
arrange(desc(status_count)) %>%
mutate(prop = status_count / sum(rb_status_2021$status_count) *100) %>%
mutate(ypos = cumsum(prop)- 0.5*prop )
## mercedes
merc_status_2021 <- df2 %>%
filter(name.x == "Mercedes", year == 2021) %>%
na.omit() %>%
group_by(name.x, status) %>%
summarize(status_count = n())
## `summarise()` has grouped output by 'name.x'. You can override using the `.groups` argument.
## compute position of labels
merc_status_2021 <- merc_status_2021 %>%
arrange(desc(status_count)) %>%
mutate(prop = status_count / sum(merc_status_2021$status_count) *100) %>%
mutate(ypos = cumsum(prop)- 0.5*prop )
## mcclaren
mcl_status_2021 <- df2 %>%
filter(name.x == "McLaren", year == 2021) %>%
na.omit() %>%
group_by(name.x, status) %>%
summarize(status_count = n())
## `summarise()` has grouped output by 'name.x'. You can override using the `.groups` argument.
## compute position of labels
mcl_status_2021 <- mcl_status_2021 %>%
arrange(desc(status_count)) %>%
mutate(prop = status_count / sum(mcl_status_2021$status_count) *100) %>%
mutate(ypos = cumsum(prop)- 0.5*prop )
pie21 <- plot_ly(labels = ~status, values = ~prop,
textposition = 'inside',textinfo = 'label+percent') %>%
add_pie(data = ferrari_status_2021, name = "Ferrari", marker = list(colors = c("#1BBC00", "#ADADAD",
"#47ABFF", "#FF0000")),
domain = list(row = 0, column = 0)) %>%
add_pie(data = rb_status_2021, name = "Red Bull", marker = list(colors = c("#1BBC00", "#FF0000",
"#ADADAD", "#000000",
"#47ABFF")),
domain = list(row = 0, column = 1))%>%
add_pie(data = merc_status_2021, name = "Mercedes", marker = list(colors = c("#1BBC00", "#FF0000",
"#47ABFF", "#ADADAD")),
domain = list(row = 1, column = 0))%>%
add_pie(data = mcl_status_2021, name = "McLaren", marker = list(colors = c("#1BBC00", "#ADADAD",
"#47ABFF","#FF0000")),
domain = list(row = 1, column = 1)) %>%
layout(title = '2021 Constructor Statuses', showlegend = T, grid=list(rows=2, columns=2),
xaxis = list(showgrid = FALSE, zeroline = FALSE, showticklabels = FALSE),
yaxis = list(showgrid = FALSE, zeroline = FALSE, showticklabels = FALSE),
annotations = list(x = c(.004, .55, .0002, .55),
y = c(.78, .78, .22, .22),
text = c("Ferrari","Red Bull","Mercedes","McLaren"),
xref = "papper",
yref = "papper",
showarrow = F
)
)
pie21